# descriprive tables 

library(dplyr)
library(haven)
library(table1)
#install.packages("xfun", type="binary")

df <- read_sav("drafts/conjoint_solidarity/perspectives_docs/revision_2/replication_code/input_data.sav")
df <- as_factor(df)
table(df$polit_90_help_rus)
df <- df %>% distinct(ResponseId, .keep_all = TRUE)

df <- df %>%
 filter(when_left == "Да")  %>%
 filter(location == "Да")


# soc-dem variables manipulation for descriptive tables
# df <- df %>%
#   mutate(sex_cat = factor(case_when(sex == "Женщина" ~ "Woman",
#                       sex == "Мужчина" ~ "Man",
#                       sex == "Другое" ~ NA)) %>% structure(label = "Sex"))

df <- df %>%
  mutate(age_respond_cat = factor(case_when(age_respondent <= 25 ~ "25 and less",
                                           age_respondent > 25 & age_respondent <= 35 ~ "25-35",
                                           age_respondent > 35 & age_respondent <= 45 ~ "35-45",
                                           age_respondent >45 ~ "45 and more"), 
 levels = c("25 and less", "25-35", "35-45", "45 and more")) %>% structure(label = "Age (categorical)"))

#label(df$age_respondent) <- "Age"

df$education <- as.numeric(df$education)
df <- df %>%
  mutate(education_cat = factor(case_when(education %in% c(1,2) ~ "Lower and upper secondary",
                                          education %in% c(3,4) ~ "Advanced vocational",
                                          education %in% c(5,6) ~ "Higher education (specialist, BA, MA, PhD)"),
levels = c("Lower and upper secondary", 
           "Advanced vocational", 
           "Higher education (specialist, BA, MA, PhD)")) %>% 
           structure(label = "Education"))

df <- df %>%
  mutate(income_cat = factor(case_when(income_consum_num == 1 ~ "Can't buy food",
                                       income_consum_num == 2 ~ "Can buy food, can't buy clothes", 
                                       income_consum_num == 3 ~ "Can buy food and clothes, can't buy TV",
                                       income_consum_num == 4 ~ "Can buy TV, can't buy a car",
                                       income_consum_num == 5 ~ "Can buy a car",
                                       income_consum_num == 6 ~ "Can afford anything"),
                             levels = c("Can't buy food", "Can buy food, can't buy clothes", 
                                        "Can buy food and clothes, can't buy TV", 
                                        "Can buy TV, can't buy a car",
                                        "Can buy a car",
                                        "Can afford anything")) %>% 
           structure(label = "Income (categorical)"))


df <- df %>%
mutate(child_bin_cat = factor(case_when(child_bin == "Да" ~ "Have children",
                                        child_bin == "Нет" ~ "Doesn't have
                                        children")) %>% 
         structure(label = "Children status")) 


# Cleaning countries variables and translating into english
# Create a named vector for mapping
country_mapping <- c("Армения" = "Armenia", "Германия" = "Germany", "Испания" = "Spain", 
                     "Нидерланды" = "Netherlands", "Грузия" = "Georgia", "Турция" = "Turkey", 
                     "Греция" = "Greece", "Эстония" = "Estonia", "Литва" = "Lithuania", 
                     "Таиланд" = "Thailand", "Канада" = "Canada", "Черногория" = "Montenegro",
                     "Сербия" = "Serbia", "Португалия" = "Portugal", "Израиль" = "Israel",
                     "Кипр" = "Cyprus", "Австрия" = "Austria", "ОАЭ" = "UAE", 
                     "Норвегия" = "Norway", "Польша" = "Poland", "Великобритания" = "United Kingdom",
                     "Франция" = "France", "Венгрия" = "Hungary", "Швеция" = "Sweden", 
                     "США" = "USA", "Кыргызстан" = "Kyrgyzstan", "Аргентина" = "Argentina",
                     "Финляндия" = "Finland", "Казахстан" = "Kazakhstan", "Чили" = "Chile",
                     "Индия" = "India", "КНДР" = "North Korea", "Южная Африка" = "South Africa",
                     "Вьетнам" = "Vietnam", "Индонезия" = "Indonesia", "Словакия" = "Slovakia", 
                     "Болгария" = "Bulgaria", "Швейцария" = "Switzerland", 
                     "Люксембург" = "Luxembourg", "Бельгия" = "Belgium", "Латвия" = "Latvia",
                     "Албания" = "Albania", "Азербайджан" = "Azerbaijan", "Египет" = "Egypt", 
                     "Бразилия" = "Brazil", "Северная Македония" = "North Macedonia",
                     "Мексика" = "Mexico", "Хорватия" = "Croatia", "Австралия" = "Australia",
                     "Южная Корея" = "South Korea", "Япония" = "Japan", "Узбекистан" = "Uzbekistan", 
                     "Тунис" = "Tunisia", "Италия" = "Italy", "Филиппины" = "Philippines", "Коста-Рика" = "Costa Rica", 
                     "Новая Зеландия" = "New Zealand", "Чехия" = "Czech Republic",
                     "Украина" = "Ukraine", "Шри-Ланка" = "Sri Lanka", "ЮАР" = "South Africa",
                     "Ирландия" = "Ireland", "Саудовская Аравия" = "Saudi Arabia", 
                     "Молдавия" = "Moldova", "Дания" = "Denmark", "Марокко" = "Morocco",
                     "Румыния" = "Romania", "Катар" = "Qatar", "Малайзия" = "Malaysia",
                     "Колумбия" = "Colombia", "Исландия" = "Iceland", "Словения" = "Slovenia",
                     "Кения" = "Kenya", "Эквадор" = "Ecuador", "Белоруссия" = "Belarus",
                     "Türkiye" = "Turkey", "Мальта" = "Malta", "Шотландия" = "Scotland",
                     "Маврикий" = "Mauritius", "Уругвай" = "Uruguay", "Монтенегро" = "Montenegro",
                     "Сакартвело" = "Georgia", "United States" = "USA", "China" = "China")

# Apply the mapping to create a new column
df <- df %>% mutate(country_english = country_mapping[country_now_merged])                 

# 6 most popular countries
popular_countries <- names(sort(table(df$country_english), decreasing=TRUE )[1:6])


# Adding regional variables
# Define vectors for each region, this time with English country names
eu_countries <- c("Germany", "Spain", "Netherlands", "Greece", 
                  "Estonia", "Lithuania", "Portugal", "Cyprus", 
                  "Austria", "Norway", "Poland", "United Kingdom", 
                  "France", "Hungary", "Sweden", "Belgium", "Latvia", 
                  "Bulgaria", "Luxembourg", "Czech Republic", "Italy", 
                  "Ireland", "Romania", "Malta", "Slovenia", "Montenegro")
# not EU countries: Montenegro

post_soviet_countries <- c("Armenia", "Georgia", "Estonia", "Lithuania",
                           "Kyrgyzstan", "Kazakhstan", "Ukraine", "Belarus", 
                           "Moldova", "North Korea")

north_american_countries <- c("Canada", "USA")

asian_countries <- c("Turkey", "Thailand", "United Arab Emirates", "India",
                     "South Korea", "Japan", "Uzbekistan", "Philippines",
                     "China")

# Assign region names based on the country
df <- df %>%
  mutate(
    region = factor(case_when(
      country_english %in% eu_countries ~ "EU",
      country_english %in% post_soviet_countries ~ "Post-Soviet",
      TRUE ~ "Other"), levels = c("Post-Soviet", "EU", "Other"))
  )

# Assign country_category names based on the country
df <- df %>%
  mutate(
    country_cat = factor(case_when(
      country_english %in% popular_countries ~ country_english,
      country_english %in% post_soviet_countries ~ "Post-Soviet",
      country_english %in% eu_countries ~ "EU",
      TRUE ~ "Other"), levels = c(popular_countries, "Post-Soviet", "EU", "Other")) %>% 
      structure(label = "Country")
  )



df <- df %>% mutate(
# responsiblity_bin = factor(case_when(responsiblity %in% c("Совершенно не чувствую1","2") ~ 0,
#                                      responsiblity %in% c("Сильно чувствую5","4") ~ 1)),
# guilt_bin = factor(case_when(guilt %in% c("Совершенно не чувствую1","2") ~ 0,
#                              guilt %in% c("Сильно чувствую5", "4") ~ 1)),
responsiblity_3 = factor(case_when(responsiblity %in% c("Совершенно не чувствую1","2") ~ "Low",
                                   responsiblity %in% c("3") ~ "Middle",
                                   responsiblity %in% c("Сильно чувствую5","4") ~ "High")),
guilt_3 = factor(case_when(guilt %in% c("Совершенно не чувствую1","2") ~ "Low",
                           guilt %in% c("3") ~ "Middle",
                           guilt %in% c("Сильно чувствую5", "4") ~ "High")))

# responsibility, guilt numeric
df$responsibility_num <- as.character(df$responsiblity)
df$responsibility_num[df$responsibility_num == "Совершенно не чувствую1"] <- 1
df$responsibility_num[df$responsibility_num == "Сильно чувствую5"] <- 5
df$responsibility_num <- as.numeric(df$responsibility_num)
label(df$responsibility_num) <- "Responsibility for war in Ukraine"

df$guilt_num <- as.character(df$guilt)
df$guilt_num[df$guilt_num == "Совершенно не чувствую1"] <- 1
df$guilt_num[df$guilt_num == "Сильно чувствую5"] <- 5
df$guilt_num <- as.numeric(df$guilt_num)
label(df$guilt_num) <- "Guilt for war in Ukraine"



# Define neighbors of Russia
neighbours <- c("Norway", "Finland", "Estonia", "Latvia", "Lithuania", "Poland", 
                "Belarus", "Ukraine", "Georgia", "Azerbaijan", "Kazakhstan", 
                "China", "Mongolia", "North Korea")

neighbours_no_allies <- c("Norway", "Finland", "Estonia", "Latvia", "Lithuania", "Poland", 
                          "Ukraine", "Georgia", 
                          "Mongolia")

# Define CIS countries (СНГ)
cis_countries <- c("Armenia", "Azerbaijan", "Belarus", "Kazakhstan", 
                   "Kyrgyzstan", "Moldova", "Russia", "Tajikistan", 
                   "Uzbekistan")

df <- df %>%
  mutate(polit_90_meet_any = if_else(is.na(polit_90_meet_safe),
                                     polit_90_meet_unsafe,
                                     polit_90_meet_safe),
         civic_index = ifelse(rowSums(is.na(dplyr::select(., polit_merged_help_ukr, 
                                                          polit_merged_help_rus, 
                                                          polit_merged_help_ngo, 
                                                          polit_merged_help_ngo_loc,
                                                          polit_merged_volunt))) == 5, 
                              NA, 
                              rowSums(dplyr::select(., polit_merged_help_ukr, 
                                                    polit_merged_help_rus, 
                                                    polit_merged_help_ngo, 
                                                    polit_merged_help_ngo_loc,
                                                    polit_merged_volunt), 
                                      na.rm = TRUE)),
         polit_index = ifelse(rowSums(is.na(dplyr::select(., polit_merged_meet_safe,
                                                          polit_merged_meet_unsafe,
                                                          polit_merged_text))) == 3, 
                              NA, 
                              rowSums(dplyr::select(., polit_merged_meet_safe,
                                                    polit_merged_meet_unsafe,
                                                    polit_merged_text), 
                                      na.rm = TRUE)),
         locality_size_eng = factor(case_when(locality_cize %in% c(" Москва") ~ "Moscow",
                                              locality_cize %in% c(" Санкт-Петербург") ~ "Saint Petersburg",
                                              locality_cize %in% c(" В другом городе-миллионнике") ~ "Big city (over million of people)",
                                              locality_cize %in% c(" В городе от 500 тыс. до 1 млн. жителей",
                                                                   " В городе от 100 тыс. до  500 тыс. жителей",
                                                                   " В небольшом городе (менее 100 тыс. жителей)",
                                                                   " В деревне, селе, поселке городского типа, на ферме") ~ "Small city or village"))) %>%
  mutate(
    country_neighbours = factor(case_when(country_english %in% neighbours ~ "Shared border",
                                          !country_english %in% neighbours ~ "No shared border")),
    country_neighbours_no_allies = factor(case_when(country_english %in% neighbours_no_allies ~ "Shared border",
                                                    !country_english %in% neighbours ~ "No shared border")),
    country_cis = factor(case_when(country_english %in% cis_countries ~ "CIS country",
                                   !country_english %in% cis_countries ~ " Not CIS country")),
    disc_country = factor(case_when(country_now_merged %in% c("Грузия", "Польша", "Латвия", "Литва", "Франция")  ~ "1. High discrimination",
                                    !country_now_merged %in% c("Грузия", "Польша", "Латвия", "Литва", "Франция")  ~ "0. Low discrimination"))
  )


df <- df %>%
  mutate(polit_bin = ifelse(polit_index > 0, 1, 0),
         civic_bin = ifelse(civic_index > 0, 1, 0)) %>%
mutate(activ_bin = factor(case_when(polit_bin %in% "1" | civic_bin %in% "1" ~ "2: Politically and civically active",
polit_bin %in% "0" & civic_bin %in% "0" ~ "1: Not active")) %>% 
  structure(label = "Political and Civic activities"))



# political interest 

df <- df %>% mutate(politics_interest_bin = factor(case_when(politics_interest_ru %in% c("Очень интересуюсь") ~ "High interest",
                                         politics_interest_ru %in% c("Скорее интересуюсь, чем нет",
                                                                     "Не очень интересуюсь",
                                                                     "Совсем не интересуюсь") ~ "Low interest")))
label(df$politics_interest_ru_num) <- "Interest to politics in Russia (1- not interested 4 - very interested)"

#migrant type 

df <- df %>% mutate(migranttype = factor(
  case_when(when_left_mobiliz %in% "После начала мобилизации (21 сентября 2022)" ~ "Septembrist",
            when_left %in% "Да" ~ "Februarist",
            when_left %in% "Нет" ~ "Earlier emigrants")))

# repression 
# df <- df %>% mutate(repress_bin = factor(case_when(repress_bin %in% 0 ~ "Wasn't repressed",
#                                repress_bin %in% 1 ~ "Was repressed")) %>% 
#                       structure(label = "Experience of repression in Russia"))


# help Russians
df <- df %>% mutate(polit_90_help_rus = factor(case_when(polit_90_help_rus %in% 0 ~ "Helps emigrants",
                                                         polit_90_help_rus %in% 1 ~ "Doesn't help emigrants")) %>% 
                      structure(label = "Help to other Russian emigrants (last three months)"))

# Relatives in Ukraine

# df <- df %>% mutate(ukrainians = factor(case_when(ukrainians %in% "Да" ~ "Have relatives in Ukraine",
#                                                   ukrainians %in% "Нет" ~ "Doesn't have relatives in Ukraine")) %>% 
#                       structure(label = "Relatives in Ukraine"))

# tables to latex
# soc dem table

knitr::kable(as.data.frame(table1(~sex, data = df)), 
             booktabs=TRUE, format = "latex")

# table 3 in appendix
knitr::kable(as.data.frame(table1(~sex + 
                                    age_respondent + income_cat + 
                                    education_cat + child_bin_cat + locality_size_eng, data = df)), 
            booktabs=TRUE, format = "latex")

# table 3 in appendix
knitr::kable(as.data.frame(table1(~country_cat, data = df)), 
             booktabs=TRUE, format = "latex")

# table 4 in appendix
knitr::kable(as.data.frame(table1(~repress_bin + 
         polit_90_help_rus + ukrainians + responsiblity_bin + guilt_bin +
         activ_bin + country_neighbours + disc_country, data = df)), booktabs=TRUE, format = "latex")



